<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>OpenAI Prompt Caching Playground</title>
<style>
  :root {
    --bg:#0d1117; --panel:#161b22; --ink:#e6edf3; --muted:#9da7b3; --accent:#3fb950; --warn:#f0883e; --border:#30363d;
  }
  html,body {background:var(--bg); color:var(--ink); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; margin:0; padding:0;}
  header {padding:16px 20px; border-bottom:1px solid var(--border); position:sticky; top:0; background:rgba(13,17,23,.9); backdrop-filter: blur(6px);}
  h1 {font-size:18px; margin:0 0 6px;}
  .sub {color:var(--muted); font-size:13px;}
  .wrap {display:grid; grid-template-columns: 360px 1fr; gap:16px; padding:16px;}
  .panel {background:var(--panel); border:1px solid var(--border); border-radius:10px; padding:14px;}
  .panel h2 {font-size:14px; margin:0 0 10px; color:#c9d1d9; letter-spacing:.2px;}
  textarea, input, select {background:#0b0f14; color:var(--ink); border:1px solid var(--border); border-radius:8px; padding:8px; width:100%; box-sizing:border-box; font: inherit;}
  textarea {min-height:110px; resize:vertical; line-height:1.35;}
  .row {display:flex; gap:8px; align-items:center;}
  .row > * {flex:1;}
  .row .shrink {flex:0 0 auto;}
  .btn {background:#21262d; border:1px solid var(--border); padding:8px 10px; border-radius:8px; cursor:pointer; color:var(--ink);}
  .btn:hover {border-color:#6e7681;}
  .btn.primary {background:var(--accent); color:#031d0b; border-color:#2ea043; font-weight:600;}
  .btn.ghost {background:transparent;}
  .grid-2 {display:grid; grid-template-columns:1fr 1fr; gap:8px;}
  .stats {display:grid; grid-template-columns: repeat(6, minmax(90px, 1fr)); gap:8px; margin-top:8px;}
  .stat {background:#0b0f14; border:1px dashed var(--border); border-radius:8px; padding:8px;}
  .stat .k {font-size:11px; color:var(--muted);}
  .stat .v {font-size:16px; font-weight:700; margin-top:4px;}
  .badge {display:inline-block; padding:2px 8px; border-radius:999px; font-size:12px; border:1px solid var(--border); background:#0b0f14; color:var(--muted);}
  .badge.hit {background:rgba(63,185,80,.12); color:#56d364; border-color:#2ea043;}
  .badge.miss {background:rgba(240,136,62,.12); color:#f0883e; border-color:#8a5700;}
  .log {font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size:12px; white-space:pre-wrap; background:#0b0f14; border:1px solid var(--border); border-radius:8px; padding:10px; max-height:260px; overflow:auto;}
  .out {background:#0b0f14; border:1px solid var(--border); border-radius:8px; padding:10px; min-height:140px; font-family: ui-monospace, Menlo, Consolas, monospace; white-space:pre-wrap;}

  /* Raw JSON sections */
  .code {
    background:#0b0f14; border:1px solid var(--border);
    border-radius:8px; padding:10px;
    font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
    font-size:12px; line-height:1.5; white-space:pre-wrap;
    max-height:260px; overflow:auto;
  }
  details summary { cursor:pointer; user-select:none; }
  details { margin:8px 0; }

  .footer-note {color:var(--muted); font-size:12px;}
  .inline {display:inline-flex; align-items:center; gap:8px;}
  .sep {height:1px; background:var(--border); margin:10px 0;}
  @media (max-width: 980px){ .wrap{grid-template-columns:1fr;} }
</style>
</head>
<body>
<header>
  <h1>Prompt Caching Playground</h1>
  <div class="sub">
    Optimize your prompt structure and observe cache hits. Cache engages at <strong>≥ 1024 tokens</strong>, with cacheable chunks in <strong>128-token</strong> increments (e.g., 1024, 1152, 1280…).
  </div>
</header>

<div class="wrap">
  <!-- LEFT: Controls -->
  <div class="panel">
    <h2>Setup</h2>
    <div class="row">
      <select id="endpoint">
        <option value="chat">Chat Completions API</option>
        <option value="responses">Responses API (supports prompt_cache_key)</option>
      </select>
      <select id="model">
        <option value="gpt-4o-mini">gpt-4o-mini</option>
        <option value="gpt-4o">gpt-4o</option>
        <option value="gpt-4.1-mini">gpt-4.1-mini</option>
      </select>
    </div>

    <div class="row" style="margin-top:8px;">
      <input id="cacheKey" placeholder="prompt_cache_key (Responses API only)" />
    </div>

    <div class="row" style="margin-top:8px;">
      <input id="scenarioName" placeholder="Scenario label (for the log)" />
      <select id="scenarioPicker" class="shrink">
        <option value="starter">Load demo scenario…</option>
        <option value="A">A) Static instructions + Doc A + Q1</option>
        <option value="A2">A2) Same instructions + Doc A + Q2</option>
        <option value="B">B) Same instructions + Doc B + Q1</option>
      </select>
      <button class="btn" id="loadScenarioBtn">Load</button>
    </div>

    <div class="sep"></div>

    <div class="row">
      <label class="inline">
        Repeat instructions
        <input type="number" id="repeatInstructions" min="1" value="1" style="width:90px" />
      </label>
      <label class="inline">
        Repeat doc
        <input type="number" id="repeatDoc" min="1" value="1" style="width:90px" />
      </label>
    </div>
    <div class="footer-note" style="margin-top:6px;">
      Use repeats to push your prompt over 1024 tokens (and observe cached increments).
    </div>

    <div class="sep"></div>

    <h2>Prompt Parts</h2>
    <label class="footer-note">Initial Instructions (static; put these first for best cache hit rate)</label>
    <textarea id="instructions"></textarea>

    <label class="footer-note" style="margin-top:8px; display:block;">Document Context (semi-static; keep above user input)</label>
    <textarea id="doc"></textarea>

    <label class="footer-note" style="margin-top:8px; display:block;">User Question (dynamic; place last)</label>
    <textarea id="question"></textarea>

    <div class="sep"></div>

    <div class="grid-2">
      <button class="btn primary" id="sendBtn">Send</button>
      <button class="btn" id="sendVariantBtn">Send Variant (new Q)</button>
    </div>
    <div class="row" style="margin-top:8px;">
      <button class="btn ghost" id="clearLogBtn">Clear Log</button>
      <button class="btn ghost" id="copyLogBtn">Copy Log</button>
    </div>
    <div class="footer-note" style="margin-top:8px;">
      Tip: Try A → A2 (same prefix, new question) to see cache hits; then A → B to see instruction-only prefix hits.
    </div>
  </div>

  <!-- RIGHT: Results -->
  <div class="panel">
    <h2>Result</h2>
    <div class="row">
      <span id="hitBadge" class="badge">cache status</span>
      <span id="timeBadge" class="badge">latency</span>
      <span id="tsBadge" class="badge">timestamp</span>
    </div>

    <div class="stats">
      <div class="stat"><div class="k">Prompt Tokens</div><div class="v" id="promptTokens">–</div></div>
      <div class="stat"><div class="k">Cached Tokens</div><div class="v" id="cachedTokens">–</div></div>
      <div class="stat"><div class="k">Cache Hit %</div><div class="v" id="hitPct">–</div></div>
      <div class="stat"><div class="k">Completion Tokens</div><div class="v" id="completionTokens">–</div></div>
      <div class="stat"><div class="k">Total Tokens</div><div class="v" id="totalTokens">–</div></div>
      <div class="stat"><div class="k">Model</div><div class="v" id="modelEcho">–</div></div>
    </div>

    <div class="sep"></div>

    <div class="row">
      <div class="stat" style="flex:1;">
        <div class="k">Endpoint</div>
        <div class="v" id="endpointEcho">–</div>
      </div>
      <div class="stat" style="flex:1;">
        <div class="k">prompt_cache_key</div>
        <div class="v" id="pckEcho" title="Only used on Responses API">–</div>
      </div>
    </div>

    <div class="sep"></div>
    <div class="out" id="output">Response will appear here…</div>

    <div class="sep"></div>
    <h2>Raw JSON</h2>
    <details open id="reqDetails">
      <summary>Most recent request JSON</summary>
      <pre id="reqJson" class="code">—</pre>
    </details>
    <details id="resDetails">
      <summary>Most recent response JSON</summary>
      <pre id="resJson" class="code">—</pre>
    </details>

    <div class="sep"></div>
    <h2>Run Log</h2>
    <div class="log" id="runLog"></div>
  </div>
</div>

<script>
/** Minimal, in-page "localStorage prompt" API key flow */
function getAPIKey() {
  let apiKey = localStorage.getItem('openai_api_key');
  if (!apiKey) {
    apiKey = prompt('Please enter your OpenAI API Key:');
    if (apiKey) {
      localStorage.setItem('openai_api_key', apiKey);
    }
  }
  return apiKey;
}

/** Helpers */
const $ = (id) => document.getElementById(id);
function nowISO() {
  const d = new Date();
  return d.toLocaleString(undefined, {hour12:false}) + ' (' + d.toISOString() + ')';
}
function logLine(txt) {
  const el = $('runLog');
  el.textContent += txt + '\n';
  el.scrollTop = el.scrollHeight;
}
function setBadge(el, text, kind) {
  el.textContent = text;
  el.classList.remove('hit','miss');
  if (kind) el.classList.add(kind);
}
function pretty(obj) {
  try { return JSON.stringify(obj, null, 2); }
  catch { return String(obj); }
}
function setReqJson(url, payload) {
  // Do NOT include headers / API key. Show only URL + payload.
  const snapshot = { url, payload };
  $('reqJson').textContent = pretty(snapshot);
}
function setResJson(data) {
  $('resJson').textContent = pretty(data);
}

/** Demo scenarios (2× length) */
const demo = {
  instructions: `You are a highly concise assistant. Always answer in 1–3 sentences unless explicitly asked for more. Use plain language. If the user asks for code, include only code unless told otherwise.

General rules:
- If asked to compare, list key differences as short bullets.
- If a calculation is needed, show the equation then the result.
- If a definition is requested, give a crisp one-liner first.
- Prefer step-by-step logic but keep it terse; avoid filler.
- Cite assumptions when they influence the answer.

Style guide:
- Avoid hedging like “it seems.” Be direct when evidence supports it.
- Use simple words; avoid jargon unless the question is technical.
- Bullets over paragraphs when listing 3+ items.
- For numbers: include units and orders of magnitude when relevant.

Math & formatting:
- Show one compact formula, then compute.
- Round sensibly; default to 2–3 sig figs unless precision matters.
- Monospace code blocks for code only; no prose inside code fences.

Safety rails:
- Decline disallowed content with a brief reason and a safer alternative.
- Don’t fabricate citations, links, or data you can’t verify.

Edge cases:
- If the question is ambiguous, answer the most common interpretation and note the alternative in one line.
- If insufficient data, state what’s missing and provide a minimal actionable next step.

Examples (for length and reference):
Q: Define latency vs throughput.
A: Latency is per-request delay; throughput is requests/second. They trade off: batching raises throughput but can add latency.

Q: Summarize a spec into 3 bullets.
A: Goal, core objects, critical risks (1 line each).

Q: Show a quick ROI calc.
A: ROI = (benefit − cost)/cost. With $50k benefit and $20k cost, ROI = (50−20)/20 = 1.5 = 150%.

When code is requested:
- Provide a minimal runnable snippet.
- Include a 1-line comment on how to run or integrate.

Tools you could hypothetically use (for realism in token length): web.run (search, open, click), python (analysis), image_query (images for people/places). These tool names are illustrative for prompt length; they don’t execute here.

Glossary (compact):
- CRDT: conflict-free replicated data type.
- Event sourcing: store events then derive state from them.

Extended guidance filler to lift token count:
${'Guideline: Prefer clear, literal phrasing; keep answers scoped; surface assumptions explicitly; show one example when helpful.\n'.repeat(60)}
(End of static instructions.)`,

  docA: `Product Spec: Nimbus Notes
- Goal: Lightning-fast note capture with offline-first sync.
- Core objects: Notebook, Note, Tag, Attachment.
- Sync model: CRDT-based; conflict-free merges.
- Pricing: Free; Pro at $4/month with 10GB attachments.
- Roadmap Q4: iOS widgets, Android share sheet revamp, web clipper.

Architecture notes:
- Local-first write path, background sync, deterministic conflict resolution.
- Indexing: inverted index on device; server compaction nightly.
- Export: Markdown + attachments in a flat bundle.

Risks:
- Complex merge semantics for rich text.
- Battery usage on large notebooks during background sync.

Extra long filler to increase tokens:
${'• Feature: ' + 'rich text, backlinks, slash commands, export to Markdown.\n'.repeat(80)}`,

  docB: `Product Spec: Zephyr Tasks
- Goal: Kanban-like tasking for small teams, built on ActivityPub.
- Core objects: Board, Column, Card, Checklist, Comment.
- Sync model: Event-sourced with snapshotting.
- Pricing: Free; Team at $6/user/month, custom SSO.
- Roadmap Q4: Gantt view, email in, calendar sync.

Architecture notes:
- Federated updates via ActivityPub; per-tenant queues.
- Snapshot intervals tuned to active column size.
- Automation hooks for webhooks and email ingestion.

Risks:
- Federation consistency and moderation boundaries.
- Notification overload without good defaults.

Extra long filler to increase tokens:
${'• Capability: ' + 'labels, due dates, swimlanes, WIP limits, automations.\n'.repeat(80)}`,

  q1A: `What are the minimum moving parts I need to build first MVP?`,
  q2A: `What are the top three risks for this approach?`,
  q1B: `What metrics should we track in the first month?`,
};

function loadScenario(which) {
  if (which === 'A') {
    $('instructions').value = demo.instructions;
    $('doc').value = demo.docA;
    $('question').value = demo.q1A;
    $('scenarioName').value = 'A (Instr + Doc A + Q1)';
  } else if (which === 'A2') {
    $('instructions').value = demo.instructions;
    $('doc').value = demo.docA;
    $('question').value = demo.q2A;
    $('scenarioName').value = 'A2 (Instr + Doc A + Q2)';
  } else if (which === 'B') {
    $('instructions').value = demo.instructions;
    $('doc').value = demo.docB;
    $('question').value = demo.q1B;
    $('scenarioName').value = 'B (Instr + Doc B + Q1)';
  } else {
    $('instructions').value = demo.instructions;
    $('doc').value = demo.docA;
    $('question').value = demo.q1A;
    $('scenarioName').value = 'Starter';
  }
}
$('loadScenarioBtn').addEventListener('click', () => loadScenario($('scenarioPicker').value));

/** Build messages with static-first ordering (to maximize cache hits) */
function buildMessages() {
  const instr = $('instructions').value;
  const doc = $('doc').value;
  const question = $('question').value;
  const repI = Math.max(1, parseInt($('repeatInstructions').value || '1', 10));
  const repD = Math.max(1, parseInt($('repeatDoc').value || '1', 10));

  const instrRepeated = Array.from({length:repI}).map(() => instr).join('\n\n');
  const docRepeated = Array.from({length:repD}).map((_,i) => `# Document Copy ${i+1}\n` + doc).join('\n\n');

  const system = instrRepeated.trim();
  const user = (`Document Context:\n${docRepeated}\n\nUser Question:\n${question}`).trim();

  const messages = [
    {role:'system', content: system},
    {role:'user', content: user},
  ];
  return {messages, system, user};
}

/** Core send function (supports Chat Completions and Responses API) */
async function send(kind) {
  const apiKey = getAPIKey();
  if (!apiKey) return;

  // Optionally mutate the user question for the "variant" button
  if (kind === 'variant') {
    $('question').value = $('question').value + ' (Please keep it to 3 bullets.)';
  }

  const endpointSel = $('endpoint').value;
  const model = $('model').value;
  const pck = $('cacheKey').value.trim() || null;
  const label = $('scenarioName').value.trim() || '(unnamed)';

  const {messages} = buildMessages();

  let url, payload, headers = {
    'Content-Type': 'application/json',
    'Authorization': `Bearer ${apiKey}`,
  };

  if (endpointSel === 'responses') {
    url = 'https://api.openai.com/v1/responses';
    payload = {
      model,
      messages,
      prompt_cache_key: pck || undefined,
      temperature: 0.2,
    };
  } else {
    url = 'https://api.openai.com/v1/chat/completions';
    payload = {
      model,
      messages,
      temperature: 0.2,
    };
  }

  // Show outgoing request snapshot (no headers / key) and reset response pane
  setReqJson(url, payload);
  $('resJson').textContent = '—';

  // UI pre-state
  setBadge($('hitBadge'), '…', null);
  setBadge($('timeBadge'), '–', null);
  setBadge($('tsBadge'), new Date().toLocaleTimeString(), null);
  $('output').textContent = 'Waiting for response…';
  $('promptTokens').textContent = '–';
  $('cachedTokens').textContent = '–';
  $('hitPct').textContent = '–';
  $('completionTokens').textContent = '–';
  $('totalTokens').textContent = '–';
  $('modelEcho').textContent = model;
  $('endpointEcho').textContent = endpointSel;
  $('pckEcho').textContent = pck || '—';

  const t0 = performance.now();
  let resp, data, ok = false, textOut = '', usage = {}, cached = 0;

  try {
    resp = await fetch(url, {method:'POST', headers, body: JSON.stringify(payload)});
    const t1 = performance.now();
    const latencyMs = Math.round(t1 - t0);

    ok = resp.ok;
    data = await resp.json();
    setResJson(data);

    // Extract output text + usage across both APIs
    if (endpointSel === 'responses') {
      textOut = data.output_text ?? '';
      if (!textOut && Array.isArray(data.output)) {
        textOut = data.output.map(o => {
          if (o?.content) {
            return o.content.map(c => (c.type === 'output_text' && c.text) ? c.text : '').join('');
          }
          return '';
        }).join('').trim();
      }
      usage = data.usage || {};
    } else {
      textOut = (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) || '';
      usage = data.usage || {};
    }

    // Pull cached_tokens if present
    const promptDetails = usage.prompt_tokens_details || {};
    cached = (typeof promptDetails.cached_tokens === 'number') ? promptDetails.cached_tokens : 0;

    // Display stats
    const pt = usage.prompt_tokens ?? '—';
    const ct = usage.completion_tokens ?? '—';
    const tt = usage.total_tokens ?? '—';
    $('promptTokens').textContent = pt;
    $('cachedTokens').textContent = cached;
    $('completionTokens').textContent = ct;
    $('totalTokens').textContent = tt;

    // Cache hit badge logic
    let hitBadgeText = (cached && cached > 0) ? `cache hit: ${cached}` : 'cache miss';
    let hitKind = (cached && cached > 0) ? 'hit' : 'miss';
    setBadge($('hitBadge'), hitBadgeText, hitKind);

    // Cache hit %
    let hitPct = (typeof pt === 'number' && pt > 0 && typeof cached === 'number') ? ((cached/pt)*100).toFixed(1) + '%' : '—';
    $('hitPct').textContent = hitPct;

    setBadge($('timeBadge'), `${latencyMs} ms`, null);
    setBadge($('tsBadge'), new Date().toLocaleTimeString(), null);

    // Show response text
    $('output').textContent = textOut || JSON.stringify(data, null, 2);

    // Log line (concise)
    const ts = nowISO();
    logLine(`[${ts}] label="${label}" endpoint=${endpointSel} model=${model}` +
            (pck ? ` pck="${pck}"` : '') +
            ` | latency=${latencyMs}ms tokens: prompt=${pt} cached=${cached} completion=${ct} total=${tt} | ` +
            `hit=${cached>0} | Q="${$('question').value.trim().slice(0,80)}"`);
  } catch (e) {
    $('output').textContent = 'Error: ' + (e?.message || e);
    setResJson({ error: String(e) });
    setBadge($('hitBadge'), 'error', 'miss');
    logLine(`[${nowISO()}] ERROR ${e?.message || e}`);
  }
}

/** UI wires */
$('sendBtn').addEventListener('click', () => send('normal'));
$('sendVariantBtn').addEventListener('click', () => send('variant'));
$('clearLogBtn').addEventListener('click', () => { $('runLog').textContent=''; });
$('copyLogBtn').addEventListener('click', async () => {
  await navigator.clipboard.writeText($('runLog').textContent);
  alert('Log copied to clipboard.');
});

/** Initialize with starter scenario + session log */
function init() {
  loadScenario('starter');
  logLine(`[${nowISO()}] Session started. Try A → A2 for a cache hit (same prefix, new question). Then A → B to see instruction-only prefix hits. Caches typically persist ~5–10 minutes idle (sometimes up to ~1 hour off-peak).`);
}
init();
</script>
</body>
</html>
